第 1 章 Plots
1.1 箱线图/小提琴图
Code
Code
p1<-mtcars %>% mutate(am=factor(am),
vs=factor(vs)) %>%
ggplot(aes(x=vs,y=mpg,fill=am))+
geom_violin(col="white",trim = FALSE)+
geom_boxplot(width=.3,position=position_dodge(width=0.9))+
theme_bw()+theme(legend.position = c(0.15,0.85))+
guides(alpha='none')+
labs(x='Name of person',y='Heart rate',title = "Performance on lie detector test")+
scale_fill_brewer(palette="Set2")
p1
1.2 柱状图
Code
mtcars %>% count(vs,cyl) %>% mutate(name=c("a","b","c","d","e")) %>%
mutate_at(c('vs','cyl'),as.factor) %>%
ggplot(aes(x=vs,y=n,fill=cyl,label=name))+
geom_bar(stat = "identity",
position = 'fill',col=1)+
geom_text(aes(label=name),size=4,vjust=0.5,position = 'fill')+
scale_fill_brewer(palette="Set2")+
# scale_fill_manual(values = heat.colors(7))+
# scale_fill_manual(values = terrain.colors(7))+
labs(title="Facebook theme",caption = "made by chz")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5,#居中
vjust =0,#上下
color = 'red',
face = "italic")
)
1.3 点线图
Code
dd<-tibble(name=rep(letters[1:10],2),health=rnorm(20,10,1),time=rep(c('a','n'),each=10))
dd%>%ggplot(aes(x= health, y= name)) +
geom_line(aes(group = name))+geom_point(aes(fill=time),shape = 21, size = 1)+
labs(title="Changes in health on time",x="health", y="name")+
theme(axis.text.y = element_text(size = 5))
1.4 树图
Code

Code
# 其中place参数控制每一个方块中标签相对于四周的位置,
# grow则控制标签是否与方块大小自适应(呈大致比例放大缩小)
# 次级分组(亚群):
ggplot(G20, aes(area = gdp_mil_usd, fill = hdi, label = country,subgroup = region)) +
geom_treemap() +
geom_treemap_subgroup_border() +
geom_treemap_subgroup_text(place = "centre", grow = T,
alpha = 0.8, colour ="black", fontface = "italic", min.size = 0) +
geom_treemap_text(colour = "red", place = "topleft", reflow = T,alpha=.5)+
scale_fill_distiller(palette="Reds")
1.6 自定义
Code
library(magick)
library(grid)
library(ggplot2)
# install.packages("palmerpenguins")
library(palmerpenguins)
p<-ggplot(penguins,aes(x = species, y = body_mass_g)) +
geom_violin(width=0.5,cex=1.2,aes(fill = species),alpha=0.5) +
geom_boxplot(width=0.1,cex=1.2)+
geom_jitter(width = 0.2)+
scale_y_continuous(limits = c(2500,8000))+
theme_classic(base_size = 20) +
scale_fill_manual(values = c("darkorange","purple","cyan4"))Code

Code
library(png) #读取.png图片
library(jpeg) #读取jpeg图片
library(grid)
library(ggimage) #ggplot2扩展包,配合ggplot2绘图
t=seq(0, 2*pi, by=0.2)
x=16*sin(t)^3
y=13*cos(t)-5*cos(2*t)-2*cos(3*t)-cos(4*t)
a=(x-min(x))/(max(x)-min(x))
b=(y-min(y))/(max(y)-min(y))
bg_img <- image_read('www/xg1.png')
bees <- data_frame(x=a,y=b)
bees$image <- rep(c("www/lp_lh1.png",NA,"www/lh_sm.png",NA),times=8)
ggplot(data = bees, aes(x = x, y = y))+
theme_bw(base_size = 20)+
annotation_custom(rasterGrob(bg_img,
width = unit(1,"npc"),
height = unit(1,"npc")),
-Inf, Inf, -Inf, Inf)+
geom_image(aes(image = image), size = 0.1)
1.8 3D玫瑰
http://www.rebeccabarter.com/blog/2017-04-20-interactive/ https://davidgohel.github.io/ggiraph/index.html
Code
x<- seq(0, 24) /24
t <- seq(0, 575, by = 0.5) / 575*20 *pi + 4 *pi
grid <- expand.grid(x = x, t = t)
x <- matrix(grid$x, ncol = 25, byrow = TRUE)
t <- matrix(grid$t, ncol = 25, byrow = TRUE)
p<- (pi/2)*exp(-t/(8*pi))
change <- sin(15 * t) /150
u<-1-(1-(3.6*t)%%(2*pi) /pi)^4/2+change
y <- 2*(x^2- x)^2* sin(p)
r<- u*(x*sin(p) +y *cos(p))
xx=r*cos(t)
yy=r*sin(t)
zz=u*(x*cos(p)-y*sin(p))
plot_ly(x = ~xx, y = ~yy, z = ~zz,color = ~zz, colors = 'Reds',opacity = 0.5)%>% add_surface()->plot
add_trace(plot,x=rep(0,4),y=rep(0,4),z=seq(-0.5,0,length=4), mode='lines', line = list(color = 'green', width = 8)) %>%
add_text(x=0,y=0,z=1,text="plot by chz",list(color = 'green', size = 8))1.9 流程图
Code
library(DiagrammeR)
grViz("
digraph {
# initiate graph
graph [layout = dot, rankdir = LR, label = '研究路线\n\n',labelloc = t]
# global node settings
node [shape = rectangle, style = filled, fillcolor = Linen]
A[label = '数据', shape = folder, fillcolor = Beige]
B[label = '预处理-\n选取,整合变量']
C[label = '欠采样\n 类别不平衡样本']
D[label = '朴素贝叶斯']
E[label = '逻辑回归']
F[label = '神经网络']
G[label= 'gbm梯度提升']
H[label= 'gbm提升模型\n参数优化']
P[label= '1.准确率 \n 2.重要性 \n 3.ROC曲线']
MOD[label= '最终模型',fillcolor = Beige]
blank1[label = '', width = 0.01, height = 0.01]
# A -> blank1[dir=none];
# blank1 -> B[minlen=10];
# {{ rank = same; blank1 B }}
# blank1 -> C
# blank2[label = '', width = 0.01, height = 0.01]
# C -> blank2[dir=none];
# blank2 -> D[minlen=1];
# {{ rank = same; blank2 E }}
# blank2 -> E [minlen=10]
A->B
{{ rank = same; A B }}
B->C
C->{D,E,F,G}
{D,E,F,G}->P
subgraph cluster_modules {
label = '模型构建'
color = red
style = dashed
# connect moderator to module 4
{D,E,F,G}
}
P->H
subgraph cluster_moderator {
label = '模型评估'
color = red
style = dashed
P}
H->MOD
{{ rank = same;H MOD }}
}
")现在我们可以试试 bookdown 的一些初级功能了,例如图表。图 1.1 是一幅无趣的散点图,表 1.1 是一份枯燥的数据。
图 1.1: 雷猴啊,散点图!
| Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
|---|---|---|---|---|
| 5.1 | 3.5 | 1.4 | 0.2 | setosa |
| 4.9 | 3.0 | 1.4 | 0.2 | setosa |
| 4.7 | 3.2 | 1.3 | 0.2 | setosa |
| 4.6 | 3.1 | 1.5 | 0.2 | setosa |
| 5.0 | 3.6 | 1.4 | 0.2 | setosa |
| 5.4 | 3.9 | 1.7 | 0.4 | setosa |
1.10 地图
leaflet
1.10.1 天心区
Code
df <- sp::SpatialPointsDataFrame(
cbind(
(runif(4,-0.5,0.5))/2 + 112.99, # lng
(runif(4,-0.5,0.5))/2 + 28.11 # lat
),
data.frame(type = factor(
rep(c("pirate", "ship"),2),
c("ship", "pirate")
))
)
oceanIcons <- iconList(
ship = makeIcon(iconUrl = "www/lp_lh1.png",
iconWidth =50, iconHeight = 50),
pirate = makeIcon(iconUrl = "www/lp_lh2.png",
iconWidth =50, iconHeight = 50)
)Code
m<-leaflet() %>%
addTiles(group = "OSM (default)") %>%
setView(112.99, 28.11, zoom = 10) %>%
addMarkers(112.99, 28.11, popup="The birthplace of R",
group = "1") %>%
addCircleMarkers(112.99, 28.11,radius = 10, color = c('red'),
group = "2") %>%
addCircles(112.99, 28.11,weight = 3,radius = 10000, color = c('red'),group = "3") %>%
addRectangles(
lng1=113.2, lat1=28.3,lng2=112.8, lat2=27.9,fillColor = "yellow",group = "4") %>%
addMarkers(data=df,icon = ~oceanIcons[type],clusterOptions = markerClusterOptions(),group = "5") %>%
addLayersControl(
baseGroups = c("OSM (default)"),
overlayGroups = c("1", "2","3", "4","5"),
options = layersControlOptions(collapsed = T,autoZIndex = TRUE)
)
m1.11 ggtree
- rtree is a simple and convenient way to generate a tree
- ggtree used to visualize the organized tree
This is the most basic diagram, containing only structural information,the root node is on the left, and the children are on the right.

1.11.1 More forms of phylogenetic tree
Arguments in function
Its arguments can be divided into three categories: data arguments, plotting arguments, and theme arguments. Here, I pick a few important parameters:
tree: A required argument that specifies the evolutionary tree object.layout: An optional argument that specifies the layout of the evolutionary tree.branch.length: An optional argument that specifies whether to display the branch lengths.layout.legend.position: An optional argument that specifies the position of the legend. The default is “right”.geom_tiplabfor adding taxa labelgeom_nodepoint(),geom_tippoint()for adding nodes
Try to change some arguments
The ggtree package inherits the advantages of ggplot2. Users can change the color, size, and type of the lines as we do with ggplot2.




We also can specify the layouts style of tree.
Code

Displaying nodes and labels The labels on the nodes, each label representing an individual.
Code

Code

1.11.2 run with another dataset
In Yu’s book, he introduced ggtree for Phylogenetic Tree Objects. I chose to study iris data from R built-in data.
Data:iris
This famous (Fisher’s or Anderson’s) iris data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica.(from help documentation of iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## Species
## 1 setosa
## 2 setosa
## 3 setosa
The data contain one hundred fifty flowers and four characteristic variables and an indicator of a flower category
## [1] 150 5
hclust: Hierarchical clustering combines samples continuously according to the distance between them, and its results are similar to the dendrogram in our study
Code
##
## Call:
## hclust(d = .)
##
## Cluster method : complete
## Distance : euclidean
## Number of objects: 150
Code
## 'dendrogram' with 2 branches and 150 members total, at height 7.085
Code
# The results are divided into three categories, considering that we have three species
clus <- cutree(hc, 3)
g <- split(1:length(clus), clus)
# plot a simple graph by ggtree
p <- ggtree(hc,size = 0.5,linetype=6)
clades <- sapply(g, function(n) MRCA(p, n))
# groupClade:The color of the branches is displayed according to the classification results
# This is based on the results of cluster analysis,
# the samples on the same color branches have similar characteristics
p <- groupClade(p, clades, group_name='group') + aes(color=group)
p
Code
# labs can write the unique label of the sample,
# but this data set does not, because the sample is a lot of flowers,
# we do not consider a single individual,
# but like in our study of city level classification, we can write lab.
d <- data.frame(label =c(1:nrow(iris)),
Species = iris[,"Species"])- layout_dendrogram() to layout the tree top-down, and theme_dendrogram() to display tree height.
- %<+%be similar to %>%
- geom_tippoint:Sets the shape and color of the end node
Code
p<-p %<+% d +
layout_dendrogram() +
geom_tippoint(aes(fill=Species, x=x+.5),
size=2, shape=21, color='black')+
geom_tiplab(aes(label=Species), cex=0.5,size=1, hjust=.5, color='black') +
geom_tiplab(angle=90, hjust=1, cex=0.5,size=1, offset=-2, show.legend=FALSE) +
scale_color_brewer(palette='Set1', breaks=1:4) +
theme_dendrogram(plot.margin=margin(6,6,80,6)) +
theme(legend.position=c(.95, .75),
legend.background = element_rect(
size = 0.2 ),legend.text=element_text(size=2),
legend.title=element_text(size=2))+labs(title = "Dendrogram of the clustering results")
p
theme_tree: Add background,may Green Be Good for your eyesight?

Do it by another way
Code
p=ggtree(hc,size = 0.8,linetype='dashed', layout="circular") %>%
groupClade(clades, group_name='group') + aes(color=group)
p<-p %<+% d +
geom_tippoint(aes(fill=Species, x=x+.5),
size=2, shape=21, color='black')+geom_tiplab(aes(label=Species), cex=0.8,size=1, hjust=.5, color='black') +
geom_tiplab(angle=90, hjust=1, cex=0.5,size=2, offset=-2, show.legend=FALSE) +
scale_color_brewer(palette='Set1', breaks=1:4) +
theme_dendrogram(plot.margin=margin(6,6,80,6)) +
theme(legend.position=c(.95, .75),
legend.background = element_rect(
size = 0.5 ),legend.text=element_text(size=5),
legend.title=element_text(size=5))+labs(title = "Dendrogram of the clustering results")+theme_tree("#FEE4E9")
p


